library(dplyr)
library(haven)

# READ IN LSIC CLEANED

lsic=read_dta("H:/Zheng_10223/Joint/LSIC/lsicwave1.dta")


# recode intendedocc_imdb:

unique(lsic$intendedocc_imdb)
lsic$intocc_imdb2=NA
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9930"]="9996"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9920"]="NA"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6271"]="G911"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4217"]="E216" 
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9970"]="NA"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1241"]="B211"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9940"]="NA"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="0631"]="A221"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2132"]="C032"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7315"]="H415"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4169"]="E038"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2162"]="C062"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2221"]="C121"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7244"]="H214"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1112"]="B012"



lsic$intocc_imdb2[lsic$intendedocc_imdb=="1122"]="B022"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2121"]="C021"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6241"]="G412"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2232"]="C132"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4162"]="E032"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2163"]="C063"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="5125"]="F025"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7221"]="H021"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1221"]="B311"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2133"]="C033"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2134"]="C034"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4121"]="E111"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1231"]="B111"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2115"]="C015"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="0611"]="A131"

lsic$intocc_imdb2[lsic$intendedocc_imdb=="1433"]="B533" 
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1111"]="B011"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7311"]="H411"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2243"]="C141"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4212"]="E212"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2161"]="C061"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2111"]="C011"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="5124"]="F024"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="3223"]="D223"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6411"]="G111"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7212"]="H012"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2147"]="C047"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2241"]="C141"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9960"]="NA"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9999"]="NA"



lsic$intocc_imdb2[lsic$intendedocc_imdb=="7381"]="H521"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9910"]="A111" 
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6453"]="G513"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="5244"]="F144"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7251"]="H111"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7414"]="H714"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6242"]="G412"

lsic$intocc_imdb2[lsic$intendedocc_imdb=="7281"]="H131"

lsic$intocc_imdb2[lsic$intendedocc_imdb=="9451"]="J161"

lsic$intocc_imdb2[lsic$intendedocc_imdb=="9980"]=""
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6421"]="G211"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="5111"]="F011"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4142"]="E132"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2253"]="C153"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1471"]="B571"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6642"]="G962"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6474"]="G814"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6221"]="G121"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2141"]="C041"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2225"]="C125"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2131"]="C031"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1225"]="B315"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7242"]="H212"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5112"]="F012"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5211"]="F111"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2148"]="C048"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="9484"]="J214"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4163"]="E033"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="9950"]="9996"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2142"]="C042"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7246"]="H216"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7216"]="H016"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2231"]="C131"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2112"]="C012"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1223"]="B313"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0111"]="A111"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7232"]="H312"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2114"]="C014"  
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3131"]="D031"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1234"]="B114"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4211"]="E211"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0122"]="A302"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2233"]="C133"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2151"]="C051"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2212"]="C112"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3211"]="D211"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0013"]="A113"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0121"]="A301"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5131"]="F031"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7241"]="H211"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6472"]="G812"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="9415"]="J125"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1242"]="B212"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7265"]="J195"
unique(lsic$intendedocc_imdb)[101:200]




lsic$intocc_imdb2[lsic$intendedocc_imdb=="2243"]="C143"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4212"]="E212"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1122"]="B022"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2161"]="C061"




lsic$intocc_imdb2[lsic$intendedocc_imdb=="9619"]="J319"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6252"]="G942"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9483"]="J213"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="5123"]="F023"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7219"]="H019"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2123"]="C023"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="3144"]="D023"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="8251"]="I011"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1411"]="B511"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="3142"]="D042"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6442"]="G722"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="0621"]="A211"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4152"]="E021" 
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6233"]="G133"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4154"]="E024"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2122"]="C022"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0632"]="A222"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7321"]="H421"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7271"]="H121"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7231"]="H311"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3152"]="D112"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6473"]="G813"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1114"]="B014"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1113"]="B013"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5232"]="F132"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5231"]="F131"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1421"]="B521"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5221"]="F121"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2255"]="C155"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7294"]="H144"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1233"]="B133"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1453"]="B553"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0311"]="A321"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2223"]="C123"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0016"]="A016"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4122"]="E112"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2222"]="C122"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="8222"]="I122"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2211"]="C111"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1222"]="B312"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3411"]="D311"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0131"]="A311"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5243"]="F143"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6431"]="G711"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6232"]="G132"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7445"]="H535"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6661"]="G931"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="8422"]="I162"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6261"]="G611"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0015"]="A015"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4141"]="E131"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2242"]="C142"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3212"]="D212"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2113"]="C013"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0211"]="A121"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5241"]="F141"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5242"]="F142"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2144"]="C044"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2264"]="C164"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7245"]="H215"
  
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3215"]="D215"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3114"]="D014"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0911"]="A391"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2146"]="C046"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2252"]="C152"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7312"]="H412"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6641"]="G961"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7371"]="H621"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4151"]="E021"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2145"]="I013"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2272"]="C172"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4143"]="E133"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3232"]="D232"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6443"]="G731"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7611"]="H821"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3413"]="D312"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1226"]="B316"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5133"]="F033"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5136"]="F036"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5225"]="F125"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3111"]="D011"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1434"]="B534"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7383"]="H483"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5254"]="F154"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5252"]="F152"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1475"]="B575"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1243"]="B213"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6212"]="G012"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7413"]="H713"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1431"]="B531"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6231"]="G131"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1414"]="B514"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6441"]="G721"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6432"]="G712"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4164"]="E034"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2143"]="C043"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0114"]="A114"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3235"]="D235"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3141"]="D041"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4153"]="E023"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2244"]="C144"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="2271"]="C171"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7351"]="H451"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3221"]="D221"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5226"]="F126"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="8431"]="I021"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3112"]="D012"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1228"]="B318"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0713"]="A373"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1232"]="B112"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5135"]="F035"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1441"]="B541"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4214"]="G813"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7272"]="H122"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0811"]="A381"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1121"]="B311"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="8254"]="I014"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="6631"]="G951"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7352"]="H222"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="7322"]="H422"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1432"]="B532"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5222"]="F122"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="5122"]="F022"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="0721"]="A141"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="4166"]="E035"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3231"]="D231"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3143"]="D043"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3213"]="D213"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="3222"]="D222"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="1442"]="B542"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="9462"]="J172"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="9441"]="J151"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="9511"]="J191"
  lsic$intocc_imdb2[lsic$intendedocc_imdb=="9414"]="J124"
  
  

lsic$intocc_imdb2[lsic$intendedocc_imdb=="7342"]="H512"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="8253"]="I013"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7344"]="H514"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="3122"]="D022"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2263"]="C163"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="1235"]="B115"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="0711"]="A371"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="0213"]="A122"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="0014"]="A114"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7411"]="H711"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4161"]="E031"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7333"]="H433"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6621"]="G971"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6651"]="G631"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2234"]="C131"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="2154"]="C054"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="3216"]="D216"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6482"]="G922"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="7313"]="H413"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="0651"]="A361"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="6663"]="G993"
lsic$intocc_imdb2[lsic$intendedocc_imdb=="4112"]="E012"
# recode : imdb new workers to lsic new workers
lsic$intocc_imdb2[lsic$intendedocc_imdb=="9993"]="9996"


dfmatch=lsic[,c("HHLDID","pastocc4","intendedocc_lsic4","intocc_imdb2","intendedocc_imdb","weight")]

# create two digit level

dfmatch$pastocc2digit=substr(dfmatch$pastocc4,1,2)
dfmatch$intocclsic2digit=substr(dfmatch$intendedocc_lsic4,1,2)
dfmatch$intoccimdb2digit=substr(dfmatch$intocc_imdb2,1,2)


# create one digit 

dfmatch$pastocc1digit=substr(dfmatch$pastocc4,1,1)
dfmatch$intocclsic1digit=substr(dfmatch$intendedocc_lsic4,1,1)
dfmatch$intoccimdb1digit=substr(dfmatch$intocc_imdb2,1,1)


# Past occupation: 
dfmatch %>% filter(pastocc4 %in% c("0000","9996","9997","9998","9999"))%>% count(pastocc4, wt=weight)

# intended occupation lsic 

dfmatch %>% filter(intendedocc_lsic4 %in% c("0000","9996","9997","9998","9999"))%>% count(intendedocc_lsic4, wt=weight)


dfmatch$past_int1occ=ifelse(dfmatch$intocclsic1digit==dfmatch$intoccimdb1digit, dfmatch$intocclsic1digit, dfmatch$pastocc1digit )
dfmatch$past_int1occ2=ifelse(dfmatch$intocclsic2digit==dfmatch$intoccimdb2digit, dfmatch$intocclsic2digit, dfmatch$pastocc2digit )


dfmatch$past_int2occ=ifelse(dfmatch$pastocc1digit==dfmatch$intoccimdb1digit, dfmatch$pastocc1digit,  dfmatch$intocclsic1digit)
dfmatch$past_int2occ2=ifelse(dfmatch$pastocc2digit==dfmatch$intoccimdb2digit, dfmatch$pastocc1digit, dfmatch$intocclsic1digit)

# drop those who have with invalid past occupation codes
dfmatch=dfmatch[dfmatch$pastocc4!="0000",]



dfmatch=dfmatch %>% filter(intoccimdb1digit!="N" & intoccimdb1digit!="" & past_int1occ!="0" & past_int1occ!="9" & !is.na(past_int1occ)) 

crosstab=dfmatch %>% count(intoccimdb1digit,past_int1occ, wt=weight)


crosstab$intocc_name[crosstab$intoccimdb1digit=="A"]="Management"
crosstab$intocc_name[crosstab$intoccimdb1digit=="B"]="Business, Finance, Admin"
crosstab$intocc_name[crosstab$intoccimdb1digit=="C"]="Natural, Applied Sciences"
crosstab$intocc_name[crosstab$intoccimdb1digit=="D"]="Health"
crosstab$intocc_name[crosstab$intoccimdb1digit=="E"]="Social Sciences, Educ."
crosstab$intocc_name[crosstab$intoccimdb1digit=="F"]="Art, Culture"
crosstab$intocc_name[crosstab$intoccimdb1digit=="G"]="Sales, Service"
crosstab$intocc_name[crosstab$intoccimdb1digit=="H"]="Trades, Transport"
crosstab$intocc_name[crosstab$intoccimdb1digit=="I"]="Primary Industry"
crosstab$intocc_name[crosstab$intoccimdb1digit=="J"]="Processing, Manufac., Utilities"

crosstab$intocc_name[crosstab$intoccimdb1digit=="N"]="New Worker"

crosstab$pastint_name[crosstab$past_int1occ=="A"]="Management"
crosstab$pastint_name[crosstab$past_int1occ=="B"]="Business, Finance, Admin"
crosstab$pastint_name[crosstab$past_int1occ=="C"]="Natural, Applied Sciences"
crosstab$pastint_name[crosstab$past_int1occ=="D"]="Health"
crosstab$pastint_name[crosstab$past_int1occ=="E"]="Social Sciences, Educ."
crosstab$pastint_name[crosstab$past_int1occ=="F"]="Art, Culture"
crosstab$pastint_name[crosstab$past_int1occ=="G"]="Sales, Service"
crosstab$pastint_name[crosstab$past_int1occ=="H"]="Trades, Transport"
crosstab$pastint_name[crosstab$past_int1occ=="I"]="Primary Industry"
crosstab$pastint_name[crosstab$past_int1occ=="J"]="Processing, Manufac., Utilities"

View(crosstab)


# get the most populated intended occupation 

onedigit=crosstab %>% group_by(intoccimdb1digit) %>% summarize(totalonedigit=sum(n))

library(ggplot2)
library(digest)

crosstab=crosstab %>% group_by(intoccimdb1digit) %>% mutate(totalrow=sum(n))

crosstab$propintoccimdb1=crosstab$n/crosstab$totalrow
crosstab$Match=crosstab$propintoccimdb1

crosstab=crosstab %>% filter(intoccimdb1digit!="9") %>% group_by(intoccimdb1digit) %>% mutate(totalval=sum(Match))


head(crosstab
     )




ggplot(data=data.frame(crosstab), aes(x=intocc_name,y=pastint_name, fill=Match)) + geom_tile()+scale_fill_gradient(low="white", high="red") + 
  xlab("Landing File") + ylab("Survey Occupation") + labs(colour="Match Share") + theme(text=element_text(size=16), axis.text.x=element_text(angle=45,hjust=0.95,vjust=1))

